

# reshape conjoint so that each row 
# is a respondent-task pair. each respondent was shown 6 conjoint
# tasks and there are around 700 completed responses so the data
# frame should be around ~700x6 = 4200 rows in the cleaned df.

options(stringsAsFactors = FALSE)

library(dplyr)
library(tidyr)
library(forcats)
library(stringr)
library(assertthat)
library(data.table)


conj = readRDS("Modified Data/Clean.RDS")

conj = dplyr::select(conj, responseid, 
              starts_with("F", ignore.case = FALSE), 
              contains("conjoint")) %>% 
  rename(responseid = responseid)


# responses ---------------------------------------------------------------


# collapse different questions together that are separated b/c of response
# order randomization

conjoint_answer_vars = names(conj)[grepl("^conjoint[0-9]_interest|^conjoint[0-9]_prob", names(conj))]
conjoint_1_vars = conjoint_answer_vars[grepl("\\.1$", conjoint_answer_vars)]
conjoint_2_vars = conjoint_answer_vars[grepl("\\.2$", conjoint_answer_vars)]
conjoint_3_vars = conjoint_answer_vars[grepl("\\.3$", conjoint_answer_vars)]
conjoint_0_vars = conjoint_answer_vars[!(conjoint_answer_vars %in% c(conjoint_1_vars, conjoint_2_vars, conjoint_3_vars))]


# complicated way to figure out what suffix corresponds to each combination of
# question and response order randomization
not_seen = list()
i = 1
for (v in c("conjoint_0_vars", "conjoint_1_vars", "conjoint_2_vars", "conjoint_3_vars")){
  not_seen[[i]] = apply(subset(conj, select = get(v)), 1, function(x) all(x == ""))
  i = i + 1
}
not_seen = do.call(cbind, not_seen) %>% as_data_frame()
not_seen$conjoint_answer_order = conj$conjoint_answer_order
not_seen$conjoint_question_order = conj$conjoint_question_order
suffix = not_seen %>% group_by(conjoint_answer_order, conjoint_question_order) %>% 
  summarise(suff0 = mean(V1), suff1 = mean(V2), suff2 = mean(V3), suff3 = mean(V4))
thesuffix = apply(suffix[, c("suff0", "suff1", "suff2", "suff3")], 1, which.min)
suffix$thesuffix = thesuffix - 1

# now standardize variables 
newvars = character(0)
for (i in 1:length(conjoint_0_vars)){
  
  # the variable without a ".X" suffix
  cj0var = conjoint_0_vars[i]
  
  # new variable name
  newvarname = gsub(".1", "", cj0var, fixed = TRUE)
  newvars = c(newvars, newvarname)
  
  conj[[newvarname]] = NA_character_
  
  # loop through randomization conditions and fill in answers
  for (j in 1:nrow(suffix)){
    
    # get old variable name that will be put into newvarname
    this_suff = suffix$thesuffix[j]
    if (this_suff != 0) {
      oldvar = paste0(cj0var, ".", this_suff)
    } else oldvar = cj0var
    
    # get the conditions that this old var corresponds to 
    this_q = suffix$conjoint_question_order[j]
    this_a = suffix$conjoint_answer_order[j]
    
    # fill in newvarname
    conj[[newvarname]] = ifelse(conj$conjoint_answer_order == this_a & conj$conjoint_question_order == this_q,
                                conj[[oldvar]], conj[[newvarname]])
    
  }
}

pct_nonmis = sapply(subset(conj, select = newvars),function(x) mean(x == ""))
assert_that(all(pct_nonmis < .35))

# get rid of redundant vars now
conj = select(conj, -one_of(conjoint_0_vars, conjoint_1_vars, conjoint_2_vars, conjoint_3_vars))


# reshape
temp_responses = subset(conj, select = c("responseid", newvars))
responses <- temp_responses %>% 
  pivot_longer(-responseid) %>% 
  mutate(conjoint  = str_extract(name, "conjoint[0-9]+"),
         outcome = str_remove(name, "conjoint[0-9]+_")) %>% 
  mutate(conjoint = as.integer(str_remove(conjoint, "conjoint")),
         candidate = as.integer(str_remove(outcome, "interest_|prob_|")),
         outcome = case_when(grepl("prob", outcome) ~ "winprob", 
                             grepl("interest", outcome) ~ "interest")) %>% 
  select(responseid, conjoint, candidate, outcome, value) %>% 
  pivot_wider(values_from = value, names_from = outcome) %>% 
  arrange(responseid, candidate, conjoint)


# randomization -----------------------------------------------------------

# now let's code the randomization better
# decode the F-1-1-1 nonsense
randvars = names(conj)[grepl("^F\\.", names(conj))]
randomization = data.frame(var = randvars, conjoint = NA_integer_, attribute = NA_integer_, candidate = NA_integer_)
randomization$conjoint = as.integer(substr(randomization$var, 3, 3))
randomization$attribute = substr(randomization$var, nchar(randomization$var), nchar(randomization$var))
randomization$candidate[nchar(randomization$var) == 5] = 0
randomization$candidate = with(randomization, ifelse(!is.na(candidate), 0, as.integer(substr(var, 5, 5))))
randomization$rename = with(randomization, paste0("cand_", candidate, "_attribute_", attribute, "_conjoint_", conjoint))


# create new R and Stata-friendly names for the attributes
attr_vars = randomization$var[randomization$candidate == 0]
attrs = conj[, attr_vars] %>% unlist %>% unique
attr_names = data.frame(orig = attrs[attrs != ""], new = NA_character_)
attr_names$new[attr_names$orig == "Annual salary:"] = "salary"
attr_names$new[attr_names$orig == "Opponent's experience:"] = "opp_exp"
attr_names$new[attr_names$orig == "You'd need to raise:"] = "fundraising"
attr_names$new[attr_names$orig == "Opponent's campaign advertising:"] = "advertising"
attr_names$new[attr_names$orig == "Opponent's ideology:"] = "opp_ideol"

# create attribute variables
for (conjoint in c(1, 2, 3)){
  for (cand in c(1, 2)) {
    for (attr in 1:nrow(attr_names)){
      new_attr_variable = paste0(attr_names$new[attr], "_cand_", cand, "_conjoint_", conjoint)
      conj[[new_attr_variable]] = NA_character_
    }
  }
}

# now fill in those attributes
temp_rand = subset(conj, select = c("responseid", randomization$var))
setDT(temp_rand, key = "responseid")
setnames(temp_rand, old = randomization$var, new = randomization$rename)

new_rand = expand.grid(responseid = unique(temp_rand$responseid), 
                       conjoint = 1:3, 
                       cand = 1:2)
for (v in attr_names$new){
  new_rand[[v]] = NA_character_
}

ats = randomization$rename[grepl("cand_0", randomization$rename)]
for (i in 1:nrow(temp_rand)){
  
  the_respondent = temp_rand[i, ]
  
  for (a in ats){
    
    # a = variable name in temp_rand that says which attribute and which conjoint task
    # new_at = variable name in new_rand
    
    new_at = the_respondent[[a]]
    new_at = attr_names$new[attr_names$orig == new_at]
    
    # which conjoint number are we looking at here? 
    cj = as.integer(substr(a, nchar(a), nchar(a)))
    
    # which attribute number are we looking at here?
    at_number = substr(a, 18, 18)
    
    
    # get attribute level for candidate 1
    atlevel_1_vname = paste0("cand_1_attribute_", at_number, "_conjoint_", cj)
    atlevel_1 = the_respondent[[atlevel_1_vname]]  
    new_rand[new_rand$responseid == the_respondent$responseid &
               new_rand$conjoint == cj & new_rand$cand == 1, new_at] = atlevel_1
    
    
    # get attribute level for candidate 2
    atlevel_2_vname = paste0("cand_2_attribute_", at_number, "_conjoint_", cj)
    atlevel_2 = the_respondent[[atlevel_2_vname]]  
    new_rand[new_rand$responseid == the_respondent$responseid &
               new_rand$conjoint == cj & new_rand$cand == 2, new_at] = atlevel_2
  }
}
names(new_rand)[names(new_rand) == "cand"] = "candidate"


# combine randomization and response data
assert_that(nrow(responses) == nrow(new_rand))
rows_before_merge = nrow(responses)
conjoint = plyr::join(responses, new_rand, by = c("responseid", "candidate", "conjoint"), type = "full")
rows_after_merge = nrow(conjoint)
assert_that(rows_before_merge == rows_after_merge)

# clean up workspace
rm(list = setdiff(ls(), c("conjoint", "newnames")))

# recode higher office interest into one variable
dat = readRDS("Modified Data/Clean.RDS")
dat$run_interest = NA_character_
dat$run_interest[dat$interest != ""] = dat$interest[dat$interest != ""]
dat$run_interest[is.na(dat$run_interest) & dat$interest.1 != ""] = dat$interest.1[is.na(dat$run_interest) & dat$interest.1 != ""]
dat$run_interest[is.na(dat$run_interest) & dat$interest.2 != ""] = dat$interest.2[is.na(dat$run_interest) & dat$interest.2 != ""]
dat$run_interest[grep("I have no interest", dat$run_interest)] = "No interest"
dat$run_interest[grep("I am open to the possibility", dat$run_interest)] = "Open to possibility"
dat$run_interest[grep("I am actively considering", dat$run_interest)] = "Actively considering"
dat$interest = NULL
dat$interest.1 = NULL
dat$interest.2 = NULL


# join demos into long response DF ----------------------------------------
conjoint = plyr::join(conjoint, dat, by = "responseid", type = "left")

# create and order factors for treatments
conjoint$salary = factor(conjoint$salary, 
                         levels = c("$15,000", "$50,000", "$80,000"))

conjoint$opp_exp = trimws(conjoint$opp_exp)
conjoint$opp_exp = car::recode(conjoint$opp_exp, 
                               "'Mayor of a town of 15,000 people' = 'Mayor';
                               'Is the incumbent' = 'Incumbent';
                               'Never held elected office' = 'No prior experience'")
conjoint$opp_exp = factor(conjoint$opp_exp, 
                          levels = c("No prior experience", 
                                     "Mayor", 
                                     "Incumbent"))
conjoint$fundraising = factor(conjoint$fundraising, 
                              levels = c("$25,000", 
                                         "$100,000", 
                                         "$300,000"))
conjoint$advertising = factor(conjoint$advertising, 
                              levels = c("No negative ads", 
                                         "Mostly negative ads"))

conjoint$opp_ideol = car::recode(conjoint$opp_ideol, 
                                 "c('Somewhat liberal', 'Somewhat conservative') = 'Somewhat ideological';
                                 c('Very liberal', 'Very conservative') = 'Very ideological'")
conjoint$opp_ideol = factor(conjoint$opp_ideol, 
                            levels = c("Moderate", "Somewhat ideological", "Very ideological"))



# recode outcome as ordinal
conjoint$interest = tolower(conjoint$interest)
conjoint$interest = car::recode(conjoint$interest, 
                                "'not interested' = -2;
                                'slightly interested' = -1;
                                'somewhat interested' = 0;
                                'moderately interested' = 1;
                                'very interested' = 2") %>% as.integer


# assign somewhat arbitrary probability values to the responses
# where very unlikely = 5%, moderately unlikely = 25%, 50/50 = 50%, 
# and symmetric for likely
conjoint$winprob = car::recode(conjoint$winprob, 
                               "'Very unlikely' = 5;
                               'Moderately unlikely' = 25;
                               '50/50' = 50;
                               'Moderately likely' = 75;
                               'Very likely' = 95") %>% as.integer



saveRDS(conjoint, file = "Modified Data/Clean_reshaped.RDS")
